{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Manifold Learning - How to improve Classification"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Automatically created module for IPython interactive environment\n"
     ]
    }
   ],
   "source": [
    "print(__doc__)\n",
    "\n",
    "import numpy as np\n",
    "#import matplotlib.pyplot as plt\n",
    "#from mpl_toolkits.mplot3d import Axes3D\n",
    "#from matplotlib.ticker import NullFormatter\n",
    "#%matplotlib inline\n",
    "from sklearn import manifold, datasets\n",
    "from sklearn import random_projection\n",
    "from sklearn.decomposition import PCA\n",
    "\n",
    "from sklearn import neighbors, linear_model\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataset dimensions: (1083, 64)\n"
     ]
    }
   ],
   "source": [
    "digits = datasets.load_digits(n_class=6)\n",
    "X_digits = digits.data\n",
    "y_digits = digits.target\n",
    "n_samples, n_features = X_digits.shape\n",
    "print('Dataset dimensions:',X_digits.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "KNN score: 0.990769\n",
      "LogisticRegression score: 0.993846\n"
     ]
    }
   ],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(X_digits,y_digits,test_size=0.3,random_state=42, stratify=y_digits)\n",
    "\n",
    "#X_train = X_digits[:int(.9 * n_samples)]\n",
    "#y_train = y_digits[:int(.9 * n_samples)]\n",
    "#X_test = X_digits[int(.9 * n_samples):]\n",
    "#y_test = y_digits[int(.9 * n_samples):]\n",
    "\n",
    "# 64 features\n",
    "knn = neighbors.KNeighborsClassifier()\n",
    "logistic = linear_model.LogisticRegression()\n",
    "\n",
    "print('KNN score: %f' % knn.fit(X_train, y_train).score(X_test, y_test))\n",
    "print('LogisticRegression score: %f'\n",
    "      % logistic.fit(X_train, y_train).score(X_test, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 165,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(325, 64)"
      ]
     },
     "execution_count": 165,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_test.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Try out different manifold-learned features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 166,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_additional_features = 3\n",
    "#transformer = random_projection.SparseRandomProjection(n_additional_features, random_state=42)\n",
    "#transformer = random_projection.GaussianRandomProjection(n_additional_features, random_state=0)\n",
    "#transformer = manifold.MDS(n_additional_features, max_iter=300, n_init=3)\n",
    "#transformer = manifold.TSNE(n_components=3, init='pca', random_state=42)\n",
    "transformer = PCA(n_additional_features)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 167,
   "metadata": {},
   "outputs": [],
   "source": [
    "#digits2 = datasets.load_digits(n_class=10)\n",
    "#X_digits2 = digits2.data\n",
    "#y_digits2 = digits2.target\n",
    "\n",
    "#Y_digits = transformer.fit_transform(X_digits)\n",
    "Y_digits = transformer.fit_transform(X_digits)\n",
    "X_digits2 = np.c_[X_digits, Y_digits]\n",
    "#X_digits2 = np.c_[X_digits, Y_digits]\n",
    "#X_digits2 = Y_digits\n",
    "#X_digits2 = X_digits2[:,64:66]\n",
    "\n",
    "#train_samples = np.random.randint(low=1, high=n_samples, size=n_samples)\n",
    "X_train2, X_test2, y_train2, y_test2 = train_test_split(X_digits2,y_digits,test_size=0.3,random_state=42, stratify=y_digits)\n",
    "\n",
    "#X_train2 = X_digits2[:int(.9 * n_samples)]\n",
    "#y_train2 = y_digits[:int(.9 * n_samples)]\n",
    "#X_test2 = X_digits2[int(.9 * n_samples):]\n",
    "#y_test2 = y_digits[int(.9 * n_samples):]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Test error with additional \"manifold\" features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 168,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "KNN score: 0.987692\n",
      "LogisticRegression score: 0.993846\n"
     ]
    }
   ],
   "source": [
    "print('KNN score: %f' % knn.fit(X_train2, y_train2).score(X_test2, y_test2))\n",
    "print('LogisticRegression score: %f'\n",
    "      % logistic.fit(X_train2, y_train2).score(X_test2, y_test2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Test error using the standard features alone"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "KNN test score: 0.990775\n",
      "LogisticRegression test score: 0.977860\n"
     ]
    }
   ],
   "source": [
    "print('KNN test score: %f' % knn.fit(X_train, y_train).score(X_test, y_test))\n",
    "print('LogisticRegression test score: %f'\n",
    "      % logistic.fit(X_train, y_train).score(X_test, y_test))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Training error using the raw features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "KNN train score: 0.998152\n",
      "LogisticRegression train score: 1.000000\n"
     ]
    }
   ],
   "source": [
    "print('KNN train score: %f' % knn.fit(X_train, y_train).score(X_train, y_train))\n",
    "print('LogisticRegression train score: %f'\n",
    "      % logistic.fit(X_train, y_train).score(X_train, y_train))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Training error using the additional manifold features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "KNN train score: 0.996303\n",
      "LogisticRegression train score: 1.000000\n"
     ]
    }
   ],
   "source": [
    "print('KNN train score: %f' % knn.fit(X_train2, y_train2).score(X_train2, y_train2))\n",
    "print('LogisticRegression train score: %f'\n",
    "      % logistic.fit(X_train2, y_train2).score(X_train2, y_train2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "429.99635"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "434*0.990775"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "425.19197"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "434*0.979705"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0"
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.sum(X_test[:,:]-X_test2[:,0:64])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
